Análisis Descriptivo

# librerías necesarias para implementar las funciones
library(readxl)
library(glue)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggmosaic)
library(ggridges)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ data.table::first()   masks dplyr::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ data.table::last()    masks dplyr::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ lubridate::second()   masks data.table::second()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## The following objects are masked from 'package:data.table':
## 
##     first, last
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(xtable)
library(here)
## here() starts at /Users/sofiabocker/Desktop/universidad/UCR/Actuariales/Cuarto año/I Ciclo/Estadística Actuarial I/Proyecto/cod
library(skimr) 
library(kableExtra) 
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(rcompanion)
library(RCurl)
## 
## Attaching package: 'RCurl'
## 
## The following object is masked from 'package:tidyr':
## 
##     complete
# importar base de datos 

# Link de la base de datos en github
url <- "https://raw.githubusercontent.com/sofiabocker/proyecto_ca_0303_g08/main/base_datos_alcohol.xlsx"

# descargar el archivo 
binary_data <- getBinaryURL(url)
temp_file <- tempfile(fileext = ".xlsx")
writeBin(binary_data, temp_file)

# leer el archivo en R
base_datos <- read_excel(temp_file)
## New names:
## • `` -> `...32`
## • `` -> `...33`
base_datos <- base_datos [, -32]
base_datos <- base_datos [, -32]
base_datos <- head(base_datos, -25)

# Comprimir las variables de 5 categorías en variables de tres categorías <

base_datos_clean <- base_datos %>%
  clean_names() %>%
  mutate(alcohol_weekdays = fct_collapse(
    alcohol_weekdays,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

#  Asegurarse que los datos se mantengan como characters

base_datos_clean$alcohol_weekdays <- as.character(base_datos_clean$alcohol_weekdays)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(alcohol_weekends = fct_collapse(
    alcohol_weekends,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

base_datos_clean$alcohol_weekends <- as.character(base_datos_clean$alcohol_weekends)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(health_status = fct_collapse(
    health_status ,
    Poor = c("Poor", "Very Poor"),
    Good = c("Very Good", "Good"),
    Fair = "Fair"
  ))

base_datos_clean$health_status <- as.character(base_datos_clean$health_status)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(good_family_relationship = fct_collapse(
    good_family_relationship,
    Poor = c("Poor", "Very Poor"),
    Good = c("Excellent", "Good"),
    Fair = "Fair"
  ))

base_datos_clean$good_family_relationship <- as.character(base_datos_clean$good_family_relationship)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(free_time_after_school = fct_collapse(
    free_time_after_school,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

base_datos_clean$free_time_after_school <- as.character(base_datos_clean$free_time_after_school)

base_datos_clean <- base_datos_clean %>%
  clean_names() %>%
  mutate(time_with_friends = fct_collapse(
    time_with_friends,
    Low = c("Low", "Very Low"),
    High = c("High", "Very High"),
    Moderate = "Moderate"
  ))

base_datos_clean$time_with_friends <- as.character(base_datos_clean$time_with_friends)
# muestra la estructura de los datos
str <- str(base_datos_clean) 
## tibble [649 × 31] (S3: tbl_df/tbl/data.frame)
##  $ school                      : chr [1:649] "Gabriel Pereira" "Gabriel Pereira" "Gabriel Pereira" "Gabriel Pereira" ...
##  $ gender                      : chr [1:649] "Female" "Female" "Female" "Female" ...
##  $ age                         : num [1:649] 18 17 15 15 16 16 16 17 15 15 ...
##  $ housing_type                : chr [1:649] "Urban" "Urban" "Urban" "Urban" ...
##  $ family_size                 : chr [1:649] "Above 3" "Above 3" "Up to 3" "Above 3" ...
##  $ parental_status             : chr [1:649] "Separated" "Living Together" "Living Together" "Living Together" ...
##  $ mother_education            : chr [1:649] "Higher Education" "Primary School" "Primary School" "Higher Education" ...
##  $ father_education            : chr [1:649] "Higher Education" "Primary School" "Primary School" "Lower Secondary School" ...
##  $ mother_work                 : chr [1:649] "Homemaker" "Homemaker" "Homemaker" "Health" ...
##  $ father_work                 : chr [1:649] "Teacher" "other" "other" "Services" ...
##  $ reason_school_choice        : chr [1:649] "Course Preference" "Course Preference" "Other" "Near Home" ...
##  $ legal_responsibility        : chr [1:649] "Mother" "Father" "Mother" "Mother" ...
##  $ commute_time                : chr [1:649] "15 to 30 min" "Up to 15 min" "Up to 15 min" "Up to 15 min" ...
##  $ weekly_study_time           : chr [1:649] "2 to 5h" "2 to 5h" "2 to 5h" "5 to 10h" ...
##  $ extra_educational_support   : chr [1:649] "Yes" "No" "Yes" "No" ...
##  $ parental_educational_support: chr [1:649] "No" "Yes" "No" "Yes" ...
##  $ private_tutoring            : chr [1:649] "No" "No" "No" "No" ...
##  $ extracurricular_activities  : chr [1:649] "No" "No" "No" "Yes" ...
##  $ attended_daycare            : chr [1:649] "Yes" "No" "Yes" "Yes" ...
##  $ desire_graduate_education   : chr [1:649] "Yes" "Yes" "Yes" "Yes" ...
##  $ has_internet                : chr [1:649] "No" "Yes" "Yes" "Yes" ...
##  $ is_dating                   : chr [1:649] "No" "No" "No" "Yes" ...
##  $ good_family_relationship    : chr [1:649] "Good" "Good" "Good" "Fair" ...
##  $ free_time_after_school      : chr [1:649] "Moderate" "Moderate" "Moderate" "Low" ...
##  $ time_with_friends           : chr [1:649] "High" "Moderate" "Low" "Low" ...
##  $ alcohol_weekdays            : chr [1:649] "Low" "Low" "Low" "Low" ...
##  $ alcohol_weekends            : chr [1:649] "Low" "Low" "Moderate" "Low" ...
##  $ health_status               : chr [1:649] "Fair" "Fair" "Fair" "Good" ...
##  $ school_absence              : num [1:649] 4 2 6 0 0 6 0 2 0 0 ...
##  $ grade_1st_semester          : num [1:649] 0 9 12 14 11 12 13 10 15 12 ...
##  $ grade_2nd_semester          : num [1:649] 11 11 13 14 13 12 12 13 16 12 ...
# resumen general de la base de datos

summary(base_datos_clean)
##     school             gender               age        housing_type      
##  Length:649         Length:649         Min.   :15.00   Length:649        
##  Class :character   Class :character   1st Qu.:16.00   Class :character  
##  Mode  :character   Mode  :character   Median :17.00   Mode  :character  
##                                        Mean   :16.74                     
##                                        3rd Qu.:18.00                     
##                                        Max.   :22.00                     
##  family_size        parental_status    mother_education   father_education  
##  Length:649         Length:649         Length:649         Length:649        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  mother_work        father_work        reason_school_choice
##  Length:649         Length:649         Length:649          
##  Class :character   Class :character   Class :character    
##  Mode  :character   Mode  :character   Mode  :character    
##                                                            
##                                                            
##                                                            
##  legal_responsibility commute_time       weekly_study_time 
##  Length:649           Length:649         Length:649        
##  Class :character     Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character  
##                                                            
##                                                            
##                                                            
##  extra_educational_support parental_educational_support private_tutoring  
##  Length:649                Length:649                   Length:649        
##  Class :character          Class :character             Class :character  
##  Mode  :character          Mode  :character             Mode  :character  
##                                                                           
##                                                                           
##                                                                           
##  extracurricular_activities attended_daycare   desire_graduate_education
##  Length:649                 Length:649         Length:649               
##  Class :character           Class :character   Class :character         
##  Mode  :character           Mode  :character   Mode  :character         
##                                                                         
##                                                                         
##                                                                         
##  has_internet        is_dating         good_family_relationship
##  Length:649         Length:649         Length:649              
##  Class :character   Class :character   Class :character        
##  Mode  :character   Mode  :character   Mode  :character        
##                                                                
##                                                                
##                                                                
##  free_time_after_school time_with_friends  alcohol_weekdays  
##  Length:649             Length:649         Length:649        
##  Class :character       Class :character   Class :character  
##  Mode  :character       Mode  :character   Mode  :character  
##                                                              
##                                                              
##                                                              
##  alcohol_weekends   health_status      school_absence   grade_1st_semester
##  Length:649         Length:649         Min.   : 0.000   Min.   : 0.0      
##  Class :character   Class :character   1st Qu.: 0.000   1st Qu.:10.0      
##  Mode  :character   Mode  :character   Median : 2.000   Median :11.0      
##                                        Mean   : 3.659   Mean   :11.4      
##                                        3rd Qu.: 6.000   3rd Qu.:13.0      
##                                        Max.   :32.000   Max.   :19.0      
##  grade_2nd_semester
##  Min.   : 0.00     
##  1st Qu.:10.00     
##  Median :11.00     
##  Mean   :11.57     
##  3rd Qu.:13.00     
##  Max.   :19.00
# explora data

skimr::skim(base_datos_clean) 
Data summary
Name base_datos_clean
Number of rows 649
Number of columns 31
_______________________
Column type frequency:
character 27
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
school 0 1 15 20 0 2 0
gender 0 1 4 6 0 2 0
housing_type 0 1 5 5 0 2 0
family_size 0 1 7 7 0 2 0
parental_status 0 1 9 15 0 2 0
mother_education 0 1 4 22 0 5 0
father_education 0 1 4 22 0 5 0
mother_work 0 1 5 9 0 5 0
father_work 0 1 5 9 0 5 0
reason_school_choice 0 1 5 17 0 4 0
legal_responsibility 0 1 5 6 0 3 0
commute_time 0 1 12 12 0 4 0
weekly_study_time 0 1 7 13 0 4 0
extra_educational_support 0 1 2 3 0 2 0
parental_educational_support 0 1 2 3 0 2 0
private_tutoring 0 1 2 3 0 2 0
extracurricular_activities 0 1 2 3 0 2 0
attended_daycare 0 1 2 3 0 2 0
desire_graduate_education 0 1 2 3 0 2 0
has_internet 0 1 2 3 0 2 0
is_dating 0 1 2 3 0 2 0
good_family_relationship 0 1 4 4 0 3 0
free_time_after_school 0 1 3 8 0 3 0
time_with_friends 0 1 3 8 0 3 0
alcohol_weekdays 0 1 3 8 0 3 0
alcohol_weekends 0 1 3 8 0 3 0
health_status 0 1 4 4 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 16.74 1.22 15 16 17 18 22 ▇▅▅▁▁
school_absence 0 1 3.66 4.64 0 0 2 6 32 ▇▂▁▁▁
grade_1st_semester 0 1 11.40 2.75 0 10 11 13 19 ▁▂▇▇▁
grade_2nd_semester 0 1 11.57 2.91 0 10 11 13 19 ▁▁▇▇▂

Variables cuantitativas

# crear un dataframe con sólo las columnas con valores numéricos
base_datos_num <- base_datos_clean %>% select_if(is.numeric)
base_datos_num
## # A tibble: 649 × 4
##      age school_absence grade_1st_semester grade_2nd_semester
##    <dbl>          <dbl>              <dbl>              <dbl>
##  1    18              4                  0                 11
##  2    17              2                  9                 11
##  3    15              6                 12                 13
##  4    15              0                 14                 14
##  5    16              0                 11                 13
##  6    16              6                 12                 12
##  7    16              0                 13                 12
##  8    17              2                 10                 13
##  9    15              0                 15                 16
## 10    15              0                 12                 12
## # ℹ 639 more rows

Estadísticas más Específicas

# brinda estadísticas más específicas 
estadisticas <- stat.desc(base_datos_num)
estadisticas
##                       age school_absence grade_1st_semester grade_2nd_semester
## nbr.val      6.490000e+02    649.0000000        649.0000000        649.0000000
## nbr.null     0.000000e+00    244.0000000          1.0000000          7.0000000
## nbr.na       0.000000e+00      0.0000000          0.0000000          0.0000000
## min          1.500000e+01      0.0000000          0.0000000          0.0000000
## max          2.200000e+01     32.0000000         19.0000000         19.0000000
## range        7.000000e+00     32.0000000         19.0000000         19.0000000
## sum          1.086700e+04   2375.0000000       7398.0000000       7509.0000000
## median       1.700000e+01      2.0000000         11.0000000         11.0000000
## mean         1.674422e+01      3.6594761         11.3990755         11.5701079
## SE.mean      4.781608e-02      0.1821657          0.1077611          0.1143703
## CI.mean.0.95 9.389318e-02      0.3577064          0.2116031          0.2245812
## var          1.483859e+00     21.5366423          7.5364806          8.4892903
## std.dev      1.218138e+00      4.6407588          2.7452651          2.9136387
## coef.var     7.274973e-02      1.2681484          0.2408323          0.2518247

Histogramas

# crea un histograma para cada columna cuantitativa

lapply(names(base_datos_num), function(col_name) {
  col <- base_datos_num[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_histogram(binwidth = 1, fill = "blue") +
    labs(title = col_name, x = col_name, y = "Frequencia")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Densidad

# crea un gráfico de densidad para cada columna cuantitativa

lapply(names(base_datos_num), function(col_name) {
  col <- base_datos_num[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_density() +
    labs(x = col_name)
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Gráficos de Barra

# crear gráficos de barra para cada columna cuantitativa

lapply(names(base_datos_num), function(col_name) {
  col <- base_datos_num[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_bar(stat = "count", fill = "darkred") +  
    labs(title = col_name, x = col_name, y = "")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Variables cualitativas

# crear un dataframe con sólo las columnas de string
base_datos_str <- base_datos_clean %>% select_if(is.character)
base_datos_str
## # A tibble: 649 × 27
##    school       gender housing_type family_size parental_status mother_education
##    <chr>        <chr>  <chr>        <chr>       <chr>           <chr>           
##  1 Gabriel Per… Female Urban        Above 3     Separated       Higher Education
##  2 Gabriel Per… Female Urban        Above 3     Living Together Primary School  
##  3 Gabriel Per… Female Urban        Up to 3     Living Together Primary School  
##  4 Gabriel Per… Female Urban        Above 3     Living Together Higher Education
##  5 Gabriel Per… Female Urban        Above 3     Living Together High School     
##  6 Gabriel Per… Male   Urban        Up to 3     Living Together Higher Education
##  7 Gabriel Per… Male   Urban        Up to 3     Living Together Lower Secondary…
##  8 Gabriel Per… Female Urban        Above 3     Separated       Higher Education
##  9 Gabriel Per… Male   Urban        Up to 3     Separated       High School     
## 10 Gabriel Per… Male   Urban        Above 3     Living Together High School     
## # ℹ 639 more rows
## # ℹ 21 more variables: father_education <chr>, mother_work <chr>,
## #   father_work <chr>, reason_school_choice <chr>, legal_responsibility <chr>,
## #   commute_time <chr>, weekly_study_time <chr>,
## #   extra_educational_support <chr>, parental_educational_support <chr>,
## #   private_tutoring <chr>, extracurricular_activities <chr>,
## #   attended_daycare <chr>, desire_graduate_education <chr>, …

Gráficos de barra

# crear gráficos de barra para cada columna cualitativa

lapply(names(base_datos_str), function(col_name) {
  col <- base_datos_str[[col_name]]
  ggplot(data.frame(col), aes(x = col)) +
    geom_bar(stat = "count", fill = "darkred") +  
    labs(title = col_name, x = col_name, y = "")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

Covariaciones

Variables cualitativas y cuantitativas

# Relaciona la nota del primer semestre con la cantidad de alcohol consumida entre semana

ggplot(base_datos_clean, aes(x = grade_1st_semester, y = alcohol_weekdays, group = alcohol_weekdays)) + 
  geom_density_ridges()
## Picking joint bandwidth of 0.747

# Relaciona la nota del primer semestre con la cantidad de alcohol consumida en fin de semana

ggplot(base_datos_clean, aes(x = grade_1st_semester, y = alcohol_weekends, group = alcohol_weekends)) + 
  geom_density_ridges()
## Picking joint bandwidth of 0.822

# Relaciona la nota del segundo semestre con la cantidad de alcohol consumida entre semana

ggplot(base_datos_clean, aes(x = grade_2nd_semester, y = alcohol_weekdays, group = alcohol_weekdays)) + 
  geom_density_ridges()
## Picking joint bandwidth of 0.678

# Relaciona la nota del segundo semestre con la cantidad de alcohol consumida en fin de semana

ggplot(base_datos_clean, aes(x = grade_2nd_semester, y = alcohol_weekends, group = alcohol_weekends)) + 
  geom_density_ridges()
## Picking joint bandwidth of 0.856

# crear un mapa de calor
create_heatmap <- function(col_name) {
  count_data <- base_datos_str %>% count(alcohol_weekdays, !!sym(col_name))
  ggplot(count_data, aes(x = alcohol_weekdays, y = !!sym(col_name))) +
    geom_tile(aes(fill = n), color = "white") +
    scale_fill_gradient(low = "white", high = "blue") +
    labs(title = paste("Comparación de alcohol entre semana con", col_name),
         x = "Alcohol entre semana", y = col_name)
}

# aplicar la función a tods las columnas
heatmap_plots <- lapply(names(base_datos_str)[-which(names(base_datos_str) == "alcohol_weekdays")], create_heatmap)

print(heatmap_plots)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

# crear un mapa de calor
create_heatmap <- function(col_name) {
  count_data <- base_datos_str %>% count(alcohol_weekends, !!sym(col_name))
  ggplot(count_data, aes(x = alcohol_weekends, y = !!sym(col_name))) +
    geom_tile(aes(fill = n), color = "white") +
    scale_fill_gradient(low = "white", high = "blue") +
    labs(title = paste("Comparación de alcohol en fin de semana con", col_name),
         x = "Alcohol en fin de semana", y = col_name)
}

# aplicar la unción a todas las columnas
heatmap_plots <- lapply(names(base_datos_str)[-which(names(base_datos_str) == "alcohol_weekends")], create_heatmap)

print(heatmap_plots)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

Prototipo de Modelación

Tablas de contingencia Alcohol entre semana

# Crear tablas de contingencia para cada columna cualitativa y la de cantidad de alcohol entre semana

tablas_contingencias_1 <- lapply(base_datos_str, function(col) {
  table(col, base_datos_str$alcohol_weekdays)
})

print(tablas_contingencias_1)
## $school
##                       
## col                    High Low Moderate
##   Gabriel Pereira        22 379       22
##   Mousinho da Silveira   12 193       21
## 
## $gender
##         
## col      High Low Moderate
##   Female    9 363       11
##   Male     25 209       32
## 
## $housing_type
##        
## col     High Low Moderate
##   Rural   10 168       19
##   Urban   24 404       24
## 
## $family_size
##          
## col       High Low Moderate
##   Above 3   23 408       26
##   Up to 3   11 164       17
## 
## $parental_status
##                  
## col               High Low Moderate
##   Living Together   31 501       37
##   Separated          3  71        6
## 
## $mother_education
##                         
## col                      High Low Moderate
##   High School               8 118       13
##   Higher Education          9 155       11
##   Lower Secondary School    7 173        6
##   None                      0   5        1
##   Primary School           10 121       12
## 
## $father_education
##                         
## col                      High Low Moderate
##   High School               5 117        9
##   Higher Education          7 110       11
##   Lower Secondary School   11 188       10
##   None                      0   7        0
##   Primary School           11 150       13
## 
## $mother_work
##            
## col         High Low Moderate
##   Health       0  45        3
##   Homemaker    8 119        8
##   other       14 229       15
##   Services     9 118        9
##   Teacher      3  61        8
## 
## $father_work
##            
## col         High Low Moderate
##   Health       1  20        2
##   Homemaker    0  39        3
##   other       17 329       21
##   Services    14 150       17
##   Teacher      2  34        0
## 
## $reason_school_choice
##                    
## col                 High Low Moderate
##   Course Preference   13 258       14
##   Near Home           10 127       12
##   Other                7  56        9
##   Reputation           4 131        8
## 
## $legal_responsibility
##         
## col      High Low Moderate
##   Father    8 133       12
##   Mother   20 408       27
##   Other     6  31        4
## 
## $commute_time
##               
## col            High Low Moderate
##   15 to 30 min   11 189       13
##   30 min to 1h    4  42        8
##   More than 1h    3  12        1
##   Up to 15 min   16 329       21
## 
## $weekly_study_time
##                
## col             High Low Moderate
##   2 to 5h         14 278       13
##   5 to 10h         2  94        1
##   More than 10h    2  29        4
##   Up to 2h        16 171       25
## 
## $extra_educational_support
##      
## col   High Low Moderate
##   No    30 510       41
##   Yes    4  62        2
## 
## $parental_educational_support
##      
## col   High Low Moderate
##   No    12 215       24
##   Yes   22 357       19
## 
## $private_tutoring
##      
## col   High Low Moderate
##   No    31 539       40
##   Yes    3  33        3
## 
## $extracurricular_activities
##      
## col   High Low Moderate
##   No    14 296       24
##   Yes   20 276       19
## 
## $attended_daycare
##      
## col   High Low Moderate
##   No    10 109        9
##   Yes   24 463       34
## 
## $desire_graduate_education
##      
## col   High Low Moderate
##   No     8  55        6
##   Yes   26 517       37
## 
## $has_internet
##      
## col   High Low Moderate
##   No     5 135       11
##   Yes   29 437       32
## 
## $is_dating
##      
## col   High Low Moderate
##   No    14 364       32
##   Yes   20 208       11
## 
## $good_family_relationship
##       
## col    High Low Moderate
##   Fair    6  92        3
##   Good   24 440       33
##   Poor    4  40        7
## 
## $free_time_after_school
##           
## col        High Low Moderate
##   High       17 203       26
##   Low         7 135       10
##   Moderate   10 234        7
## 
## $time_with_friends
##           
## col        High Low Moderate
##   High       23 202       26
##   Low         4 182        7
##   Moderate    7 188       10
## 
## $alcohol_weekdays
##           
## col        High Low Moderate
##   High       34   0        0
##   Low         0 572        0
##   Moderate    0   0       43
## 
## $alcohol_weekends
##           
## col        High Low Moderate
##   High       26  74       32
##   Low         4 391        2
##   Moderate    4 107        9
## 
## $health_status
##       
## col    High Low Moderate
##   Fair    9 110        5
##   Good   19 310       28
##   Poor    6 152       10

Diagrama de mosaico

# crear una representación gráfica de las tablas de contingencia

lapply(seq_along(tablas_contingencias_1), function(i) {
  mosaicplot(tablas_contingencias_1[[i]],
              color = TRUE,
              xlab = "Alcohol entre semana",
              ylab = names(tablas_contingencias_1[[i]])[2], 
              main = paste("Alcohol entre Semana y", names(base_datos_str)[i][1]))
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
## 
## [[21]]
## NULL
## 
## [[22]]
## NULL
## 
## [[23]]
## NULL
## 
## [[24]]
## NULL
## 
## [[25]]
## NULL
## 
## [[26]]
## NULL
## 
## [[27]]
## NULL

Prueba Chi-Cuadrado

# aplicar la prueba de independencia de chi-cuadrado a cada tabla de contingencia

chi_cuadrado_1 <- lapply(tablas_contingencias_1, chisq.test)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi_cuadrado_1
## $school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.0191, df = 2, p-value = 0.134
## 
## 
## $gender
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 39.436, df = 2, p-value = 2.733e-09
## 
## 
## $housing_type
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.1675, df = 2, p-value = 0.1245
## 
## 
## $family_size
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.3978, df = 2, p-value = 0.3015
## 
## 
## $parental_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.49529, df = 2, p-value = 0.7806
## 
## 
## $mother_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.3106, df = 8, p-value = 0.3168
## 
## 
## $father_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.1102, df = 8, p-value = 0.847
## 
## 
## $mother_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 6.1653, df = 8, p-value = 0.6287
## 
## 
## $father_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 10.683, df = 8, p-value = 0.2203
## 
## 
## $reason_school_choice
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 12.356, df = 6, p-value = 0.05448
## 
## 
## $legal_responsibility
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.6798, df = 4, p-value = 0.04618
## 
## 
## $commute_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 13.687, df = 6, p-value = 0.03333
## 
## 
## $weekly_study_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 23.815, df = 6, p-value = 0.0005648
## 
## 
## $extra_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.696, df = 2, p-value = 0.4283
## 
## 
## $parental_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 5.7747, df = 2, p-value = 0.05572
## 
## 
## $private_tutoring
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.60637, df = 2, p-value = 0.7385
## 
## 
## $extracurricular_activities
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.7848, df = 2, p-value = 0.4097
## 
## 
## $attended_daycare
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.2162, df = 2, p-value = 0.3302
## 
## 
## $desire_graduate_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.0739, df = 2, p-value = 0.0291
## 
## 
## $has_internet
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.5606, df = 2, p-value = 0.4583
## 
## 
## $is_dating
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.4615, df = 2, p-value = 0.00882
## 
## 
## $good_family_relationship
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.4854, df = 4, p-value = 0.1124
## 
## 
## $free_time_after_school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 15.161, df = 4, p-value = 0.004379
## 
## 
## $time_with_friends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 24.017, df = 4, p-value = 7.925e-05
## 
## 
## $alcohol_weekdays
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1298, df = 4, p-value < 2.2e-16
## 
## 
## $alcohol_weekends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 171.75, df = 4, p-value < 2.2e-16
## 
## 
## $health_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.2113, df = 4, p-value = 0.3782

Tablas de contingencia Alcohol fin de semana

# Crear tablas de contingencia para cada columna cualitativa y la de cantidad de alcohol en fines de semana
tablas_contingencias_2 <- lapply(base_datos_str, function(col) {
  table(col, base_datos_str$alcohol_weekends)
})

print(tablas_contingencias_2)
## $school
##                       
## col                    High Low Moderate
##   Gabriel Pereira        87 259       77
##   Mousinho da Silveira   45 138       43
## 
## $gender
##         
## col      High Low Moderate
##   Female   37 275       71
##   Male     95 122       49
## 
## $housing_type
##        
## col     High Low Moderate
##   Rural   39 120       38
##   Urban   93 277       82
## 
## $family_size
##          
## col       High Low Moderate
##   Above 3   85 288       84
##   Up to 3   47 109       36
## 
## $parental_status
##                  
## col               High Low Moderate
##   Living Together  117 341      111
##   Separated         15  56        9
## 
## $mother_education
##                         
## col                      High Low Moderate
##   High School              33  75       31
##   Higher Education         35 110       30
##   Lower Secondary School   28 128       30
##   None                      2   3        1
##   Primary School           34  81       28
## 
## $father_education
##                         
## col                      High Low Moderate
##   High School              30  70       31
##   Higher Education         29  80       19
##   Lower Secondary School   39 133       37
##   None                      0   7        0
##   Primary School           34 107       33
## 
## $mother_work
##            
## col         High Low Moderate
##   Health      10  26       12
##   Homemaker   27  84       24
##   other       46 168       44
##   Services    31  75       30
##   Teacher     18  44       10
## 
## $father_work
##            
## col         High Low Moderate
##   Health       6  17        0
##   Homemaker    7  31        4
##   other       71 221       75
##   Services    45  99       37
##   Teacher      3  29        4
## 
## $reason_school_choice
##                    
## col                 High Low Moderate
##   Course Preference   59 181       45
##   Near Home           32  89       28
##   Other               19  39       14
##   Reputation          22  88       33
## 
## $legal_responsibility
##         
## col      High Low Moderate
##   Father   32  94       27
##   Mother   92 280       83
##   Other     8  23       10
## 
## $commute_time
##               
## col            High Low Moderate
##   15 to 30 min   43 130       40
##   30 min to 1h   12  31       11
##   More than 1h    7   9        0
##   Up to 15 min   70 227       69
## 
## $weekly_study_time
##                
## col             High Low Moderate
##   2 to 5h         52 193       60
##   5 to 10h         5  72       20
##   More than 10h    6  25        4
##   Up to 2h        69 107       36
## 
## $extra_educational_support
##      
## col   High Low Moderate
##   No   123 351      107
##   Yes    9  46       13
## 
## $parental_educational_support
##      
## col   High Low Moderate
##   No    64 145       42
##   Yes   68 252       78
## 
## $private_tutoring
##      
## col   High Low Moderate
##   No   120 375      115
##   Yes   12  22        5
## 
## $extracurricular_activities
##      
## col   High Low Moderate
##   No    61 210       63
##   Yes   71 187       57
## 
## $attended_daycare
##      
## col   High Low Moderate
##   No    33  71       24
##   Yes   99 326       96
## 
## $desire_graduate_education
##      
## col   High Low Moderate
##   No    20  35       14
##   Yes  112 362      106
## 
## $has_internet
##      
## col   High Low Moderate
##   No    27 101       23
##   Yes  105 296       97
## 
## $is_dating
##      
## col   High Low Moderate
##   No    87 248       75
##   Yes   45 149       45
## 
## $good_family_relationship
##       
## col    High Low Moderate
##   Fair   26  55       20
##   Good   92 314       91
##   Poor   14  28        9
## 
## $free_time_after_school
##           
## col        High Low Moderate
##   High       66 126       54
##   Low        26 106       20
##   Moderate   40 165       46
## 
## $time_with_friends
##           
## col        High Low Moderate
##   High       95 106       50
##   Low        14 154       25
##   Moderate   23 137       45
## 
## $alcohol_weekdays
##           
## col        High Low Moderate
##   High       26   4        4
##   Low        74 391      107
##   Moderate   32   2        9
## 
## $alcohol_weekends
##           
## col        High Low Moderate
##   High      132   0        0
##   Low         0 397        0
##   Moderate    0   0      120
## 
## $health_status
##       
## col    High Low Moderate
##   Fair   24  78       22
##   Good   85 207       65
##   Poor   23 112       33
# crear una representación gráfica de las tablas de contingencia

lapply(seq_along(tablas_contingencias_2), function(i) {
  mosaicplot(tablas_contingencias_2[[i]],
              color = TRUE,
              xlab = "Alcohol Fin de Semana",
              ylab = names(tablas_contingencias_2[[i]])[2], 
              main = paste("Alcohol Fin de Semana y", names(base_datos_str)[i][1]))
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
## 
## [[17]]
## NULL
## 
## [[18]]
## NULL
## 
## [[19]]
## NULL
## 
## [[20]]
## NULL
## 
## [[21]]
## NULL
## 
## [[22]]
## NULL
## 
## [[23]]
## NULL
## 
## [[24]]
## NULL
## 
## [[25]]
## NULL
## 
## [[26]]
## NULL
## 
## [[27]]
## NULL

Prueba Chi-Cuadrado

# aplicar la prueba de independencia de chi-cuadrado a cada tabla de contingencia
chi_cuadrado_2 <- lapply(tablas_contingencias_2, chisq.test)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi_cuadrado_2
## $school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.085819, df = 2, p-value = 0.958
## 
## 
## $gender
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 69.654, df = 2, p-value = 7.495e-16
## 
## 
## $housing_type
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.14167, df = 2, p-value = 0.9316
## 
## 
## $family_size
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.171, df = 2, p-value = 0.2049
## 
## 
## $parental_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.8628, df = 2, p-value = 0.1449
## 
## 
## $mother_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 10.255, df = 8, p-value = 0.2476
## 
## 
## $father_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 9.9856, df = 8, p-value = 0.266
## 
## 
## $mother_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.0431, df = 8, p-value = 0.532
## 
## 
## $father_work
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 18.679, df = 8, p-value = 0.01668
## 
## 
## $reason_school_choice
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 6.8145, df = 6, p-value = 0.3383
## 
## 
## $legal_responsibility
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.0722, df = 4, p-value = 0.8987
## 
## 
## $commute_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 8.0027, df = 6, p-value = 0.2379
## 
## 
## $weekly_study_time
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 37.497, df = 6, p-value = 1.409e-06
## 
## 
## $extra_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.4215, df = 2, p-value = 0.298
## 
## 
## $parental_educational_support
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 6.8137, df = 2, p-value = 0.03314
## 
## 
## $private_tutoring
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.0945, df = 2, p-value = 0.2128
## 
## 
## $extracurricular_activities
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1.8354, df = 2, p-value = 0.3994
## 
## 
## $attended_daycare
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 3.1753, df = 2, p-value = 0.2044
## 
## 
## $desire_graduate_education
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 4.3507, df = 2, p-value = 0.1136
## 
## 
## $has_internet
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 2.7657, df = 2, p-value = 0.2509
## 
## 
## $is_dating
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 0.53281, df = 2, p-value = 0.7661
## 
## 
## $good_family_relationship
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 5.057, df = 4, p-value = 0.2815
## 
## 
## $free_time_after_school
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 18.865, df = 4, p-value = 0.0008356
## 
## 
## $time_with_friends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 94.004, df = 4, p-value < 2.2e-16
## 
## 
## $alcohol_weekdays
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 171.75, df = 4, p-value < 2.2e-16
## 
## 
## $alcohol_weekends
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 1298, df = 4, p-value < 2.2e-16
## 
## 
## $health_status
## 
##  Pearson's Chi-squared test
## 
## data:  X[[i]]
## X-squared = 7.4814, df = 4, p-value = 0.1125
# asegurarse que los datos sean factores

base_datos_str[] <- lapply(base_datos_str, as.factor)

# generar la V de Cramer para cada columna con alcohol_weekdays

v_cramer_entre_semana <- sapply(base_datos_str, function(col) {
  cramerV(base_datos_str$alcohol_weekdays, col, ci = FALSE, conf = 0.95, type = "perc", R = 1000, histogram = FALSE, digits = 4, bias.correct = FALSE, reportIncomplete = FALSE, verbose = FALSE, tolerance = 1e-16)
})

print(v_cramer_entre_semana)
##                       school.Cramer V                       gender.Cramer V 
##                               0.07869                               0.24650 
##                 housing_type.Cramer V                  family_size.Cramer V 
##                               0.08013                               0.06078 
##              parental_status.Cramer V             mother_education.Cramer V 
##                               0.02763                               0.08469 
##             father_education.Cramer V                  mother_work.Cramer V 
##                               0.05627                               0.06892 
##                  father_work.Cramer V         reason_school_choice.Cramer V 
##                               0.09072                               0.09757 
##         legal_responsibility.Cramer V                 commute_time.Cramer V 
##                               0.08636                               0.10270 
##            weekly_study_time.Cramer V    extra_educational_support.Cramer V 
##                               0.13550                               0.05112 
## parental_educational_support.Cramer V             private_tutoring.Cramer V 
##                               0.09433                               0.03057 
##   extracurricular_activities.Cramer V             attended_daycare.Cramer V 
##                               0.05244                               0.05844 
##    desire_graduate_education.Cramer V                 has_internet.Cramer V 
##                               0.10440                               0.04904 
##                    is_dating.Cramer V     good_family_relationship.Cramer V 
##                               0.12070                               0.07594 
##       free_time_after_school.Cramer V            time_with_friends.Cramer V 
##                               0.10810                               0.13600 
##             alcohol_weekdays.Cramer V             alcohol_weekends.Cramer V 
##                               1.00000                               0.36380 
##                health_status.Cramer V 
##                               0.05696
# generar la V de Cramer para cada columna con alcohol_weekends

v_cramer_fin_semana <- sapply(base_datos_str, function(col) {
  cramerV(base_datos_str$alcohol_weekends, col, ci = FALSE, conf = 0.95, type = "perc", R = 1000, histogram = FALSE, digits = 4, bias.correct = FALSE, reportIncomplete = FALSE, verbose = FALSE, tolerance = 1e-16)
})

print(v_cramer_fin_semana)
##                       school.Cramer V                       gender.Cramer V 
##                               0.01150                               0.32760 
##                 housing_type.Cramer V                  family_size.Cramer V 
##                               0.01477                               0.06990 
##              parental_status.Cramer V             mother_education.Cramer V 
##                               0.07715                               0.08889 
##             father_education.Cramer V                  mother_work.Cramer V 
##                               0.08771                               0.07366 
##                  father_work.Cramer V         reason_school_choice.Cramer V 
##                               0.12000                               0.07246 
##         legal_responsibility.Cramer V                 commute_time.Cramer V 
##                               0.02874                               0.07852 
##            weekly_study_time.Cramer V    extra_educational_support.Cramer V 
##                               0.17000                               0.06108 
## parental_educational_support.Cramer V             private_tutoring.Cramer V 
##                               0.10250                               0.06905 
##   extracurricular_activities.Cramer V             attended_daycare.Cramer V 
##                               0.05318                               0.06995 
##    desire_graduate_education.Cramer V                 has_internet.Cramer V 
##                               0.08188                               0.06528 
##                    is_dating.Cramer V     good_family_relationship.Cramer V 
##                               0.02865                               0.06242 
##       free_time_after_school.Cramer V            time_with_friends.Cramer V 
##                               0.12060                               0.26910 
##             alcohol_weekdays.Cramer V             alcohol_weekends.Cramer V 
##                               0.36380                               1.00000 
##                health_status.Cramer V 
##                               0.07592